From 522cb75c540b20cbdaaae1c60d856812673897f0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= Date: Fri, 8 Sep 2017 21:26:04 +0200 Subject: [PATCH] babl: add icc meta-data extraction functions Instead of extracting all meta-data of ICC profiles and attaching them to BablSpaces, we provide an string key/value API to query meta data of in-memory ICC profiles. --- babl/babl-icc.c | 308 ++++++++++++++++++++++++++++++++------- babl/babl-space.h | 2 - babl/babl.h | 34 ++++- tools/babl-icc-rewrite.c | 40 ++++- 4 files changed, 318 insertions(+), 66 deletions(-) diff --git a/babl/babl-icc.c b/babl/babl-icc.c index 0bf74cb..7d79051 100644 --- a/babl/babl-icc.c +++ b/babl/babl-icc.c @@ -578,7 +578,27 @@ const char *babl_space_to_icc (const Babl *babl, int *ret_length) return icc; } -static char *icc_decode_mluc (ICC *state, int offset, int element_length, char *lang, char *country) + +typedef uint32_t UTF32; +typedef uint16_t UTF16; +typedef uint8_t UTF8; + +typedef enum { + strictConversion = 0, + lenientConversion +} ConversionFlags; + +static int ConvertUTF16toUTF8 (const UTF16** sourceStart, + const UTF16* sourceEnd, + UTF8** targetStart, + UTF8* targetEnd, + ConversionFlags flags); + +static char *icc_decode_mluc (ICC *state, + int offset, + int element_length, + const char *lang, + const char *country) { int n_records = icc_read (u32, offset + 8); int record_size = icc_read (u32, offset + 12); @@ -598,32 +618,60 @@ static char *icc_decode_mluc (ICC *state, int offset, int element_length, char * (!country || !strcmp (country, icountry))) || (i == n_records - 1)) { - int slength = icc_read(u32, offset + o + 4); + int slength = (icc_read(u32, offset + o + 4))/2; int soffset = icc_read(u32, offset + o + 8); - char *ret = babl_malloc (slength * 2); + UTF16 *tmp_ret = babl_calloc (sizeof (uint16_t), slength + 1); + UTF16 *tmp_ret2 = tmp_ret; + unsigned char *ret = babl_calloc (1, slength * 4 + 1); // worst case scenario + unsigned char *ret2 = ret; int j; - for (j = 0; j < slength/2; j++) + for (j = 0; j < slength; j++) { - int hi = icc_read(u8, offset + soffset + j * 2 + 0); - int lo = icc_read(u8, offset + soffset + j * 2 + 1); - - ret[j] = lo + hi * 0; // only ASCII survives this - // brute utf16 decoding, so it is - // good we ask for english. + tmp_ret[j] = icc_read(u16, offset + soffset + j * 2); } - ret[j] = 0; - return ret; + tmp_ret[j] = 0; + memset (ret, 0, slength * 4 + 1); + ConvertUTF16toUTF8 ((void*)&tmp_ret2, tmp_ret + slength, &ret2, ret + slength, lenientConversion); + babl_free(tmp_ret); + { // trim down to actually used utf8 + unsigned char *tmp = (void*)strdup ((void*)ret); + babl_free (ret); + ret = tmp; + } + return (void*)ret; } o+=record_size; } - return babl_strdup (""); + return NULL; +} + +static char *decode_string (ICC *state, const char *tag, const char *lang, const char *country) +{ + int offset, element_size; + + if (!icc_tag (state, tag, &offset, &element_size)) + return NULL; + + if (!strcmp (state->data + offset, "mluc")) + { + return icc_decode_mluc (state, offset, element_size, lang, country); + } + else if (!strcmp (state->data + offset, "text")) + { + return strdup (state->data + offset + 8); + } + else if (!strcmp (state->data + offset, "desc")) + { + return strdup (state->data + offset + 12); + } + return NULL; } const Babl * -babl_space_from_icc (const char *icc_data, - int icc_length, - const char **error) +babl_space_from_icc (const char *icc_data, + int icc_length, + const char **error) { ICC *state = icc_state_new ((char*)icc_data, icc_length, 0); int profile_size = icc_read (u32, 0); @@ -632,8 +680,6 @@ babl_space_from_icc (const char *icc_data, const Babl *trc_green = NULL; const Babl *trc_blue = NULL; const char *int_err; - char *descr = NULL; - char *copyright = NULL; Babl *ret = NULL; sign_t profile_class, color_space; @@ -691,34 +737,6 @@ babl_space_from_icc (const char *icc_data, return NULL; } - { - int offset, element_size; - icc_tag (state, "desc", &offset, &element_size); - if (!strcmp (state->data + offset, "mluc")) - { - descr = icc_decode_mluc (state, offset, element_size, "en", NULL); - } - else - if (!strcmp (state->data + offset, "desc")) - { - descr = babl_strdup (state->data + offset + 12); - } - } - - { - int offset, element_size; - icc_tag (state, "cprt", &offset, &element_size); - if (!strcmp (state->data + offset, "mluc")) - { - copyright = icc_decode_mluc (state, offset, element_size, "en", NULL); - } - else - if (!strcmp (state->data + offset, "desc")) - { - copyright = babl_strdup (state->data + offset + 8); - } - } - if (icc_tag (state, "rXYZ", NULL, NULL) && icc_tag (state, "gXYZ", NULL, NULL) && icc_tag (state, "bXYZ", NULL, NULL) && @@ -763,8 +781,6 @@ babl_space_from_icc (const char *icc_data, ry, gy, by, rz, gz, bz, trc_red, trc_green, trc_blue); - ret->space.description = descr; - ret->space.copyright = copyright; babl_free (state); return ret; @@ -813,8 +829,6 @@ babl_space_from_icc (const char *icc_data, green_x, green_y, blue_x, blue_y, trc_red, trc_green, trc_blue); - ret->space.description = descr; - ret->space.copyright = copyright; return ret; } } @@ -847,3 +861,197 @@ static void symmetry_test (ICC *state) assert (icc_read (u32, 8) == 4); } +char *babl_icc_get_key (const char *icc_data, + int icc_length, + const char *key, + const char *language, + const char *country) +{ + char *ret = NULL; + ICC *state = icc_state_new ((void*)icc_data, icc_length, 0); + + if (!state) + return ret; + + if (!strcmp (key, "copyright") || + !strcmp (key, "cprt")) + { + ret = decode_string (state, "cprt", language, country); + + } else if (!strcmp (key, "description") || + !strcmp (key, "profileDescriptionTag") || + !strcmp (key, "desc")) + { + ret = decode_string (state, "desc", language, country); + + } else if (!strcmp (key, "manufacturer") || + !strcmp (key, "deviceMfgDescTag") || + !strcmp (key, "dmnd")) + { + ret = decode_string (state, "dmnd", language, country); + + } else if (!strcmp (key, "device") || + !strcmp (key, "deviceModelDescTag") || + !strcmp (key, "dmdd")) + { + ret = decode_string (state, "dmdd", language, country); + } else if (!strcmp (key, "class") || + !strcmp (key, "profile-class")) + { + sign_t tag = icc_read (sign, 12); + return strdup (tag.str); + } else if (!strcmp (key, "color-space")) + { + sign_t tag = icc_read (sign, 16); + return strdup (tag.str); + } else if (!strcmp (key, "pcs")) + { + sign_t tag = icc_read (sign, 20); + return strdup (tag.str); + } else if (!strcmp (key, "intent")) + { + char tag[5]; + int val = icc_read (u32, 64); + sprintf (tag, "%i", val); + return strdup (tag); + } else if (!strcmp (key, "tags")) + { + char tag[4096]="NYI"; + return strdup (tag); + } + babl_free (state); + return ret; +} + + +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Source code file. + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Sept 2001: fixed const & error conditions per + mods suggested by S. Parent & A. Lillich. + June 2002: Tim Dodd added detection and handling of incomplete + source sequences, enhanced error detection, added casts + to eliminate compiler warnings. + July 2003: slight mods to back out aggressive FFFE detection. + Jan 2004: updated switches in from-UTF8 conversions. + Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. + Sep 2017: copied only the bits neccesary for utf16toutf8 into babl, + otherwise unchanged from upstream. + + See the header file "ConvertUTF.h" for complete documentation. + +------------------------------------------------------------------------ */ + +typedef uint32_t UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ +typedef unsigned char Boolean; /* 0 or 1 */ +typedef enum { + conversionOK, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +} ConversionResult; + +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD + + +#define UNI_SUR_HIGH_START (UTF32)0xD800 +#define UNI_SUR_HIGH_END (UTF32)0xDBFF +#define UNI_SUR_LOW_START (UTF32)0xDC00 +#define UNI_SUR_LOW_END (UTF32)0xDFFF +static const int halfShift = 10; /* used for shifting by 10 bits */ + +static const UTF32 halfBase = 0x0010000UL; +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +static int ConvertUTF16toUTF8 (const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) +{ + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + UTF32 ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} diff --git a/babl/babl-space.h b/babl/babl-space.h index 70ed535..6aeefb6 100644 --- a/babl/babl-space.h +++ b/babl/babl-space.h @@ -62,8 +62,6 @@ typedef struct * standard should win. */ - char *description; /* meta data possibly read from ICC */ - char *copyright; } BablSpace; static inline void babl_space_to_xyzf (const Babl *space, const float *rgb, float *xyz) diff --git a/babl/babl.h b/babl/babl.h index 5d16b35..80f342b 100644 --- a/babl/babl.h +++ b/babl/babl.h @@ -95,22 +95,44 @@ const Babl * babl_space (const char *name); * * @icc_data: pointer to icc profile in memory * @icc_length: length of icc profile in bytes + * @intent: the intent from the ICC profile to use. + * " @error: pointer to a string where decoding errors can be stored, * if an error occurs, NULL is returned and an error message * is provided in error. * * Create a babl space from an in memory ICC profile, the * profile does no longer need to be loaded for the space to work, - * multiple calls with the same icc profile will result in the - * same space. + * multiple calls with the same icc profile and same icc_transform + * will result in the same space. * * If a BablSpace cannot be created from the profile NULL is returned and a * static string is set on the const char *value pointed at with &value - * containing a message describing why the icc does not yield a babl space. + * containing a message describing why the provided data does not yield a babl + * space. */ -const Babl *babl_space_from_icc (const char *icc_data, - int icc_length, - const char **error); +const Babl *babl_space_from_icc (const char *icc_data, + int icc_length, + const char **error); + +/* babl_icc_get_key: + * + * @icc_data: pointer to in-memory icc profile + * @icc_length: length of icc profile in bytes + * @key: the key we want to quey, see below for some supported values + * @language: 2 char code for language to extract or NULL + * @country: 2 char country code or NULL + * + * Returns NULL if key not found or a malloc allocated utf8 string of the key + * when found, free with free() when done. Supported keys: "description", + * "copyright", "manufacturer", "device", "profile-class", "color-space" and + * "pcs". + */ +char *babl_icc_get_key (const char *icc_data, + int icc_length, + const char *key, + const char *language, + const char *counter); /* babl_space_to_icc: diff --git a/tools/babl-icc-rewrite.c b/tools/babl-icc-rewrite.c index 1d292b4..0746d50 100644 --- a/tools/babl-icc-rewrite.c +++ b/tools/babl-icc-rewrite.c @@ -36,6 +36,8 @@ main (int argc, long icc_len; int genlen; const char *error; + const char *la = NULL; + const char *co = NULL; babl_init (); if (!argv[1] || !argv[2]) @@ -47,6 +49,36 @@ main (int argc, if (file_get_contents (argv[1], &icc_data, &icc_len, NULL)) return -1; + { + char *description = babl_icc_get_key (icc_data, icc_len, "description", la, co); + if (description) + fprintf (stderr, "description: %s\n", description); + } + + { + char *str = babl_icc_get_key (icc_data, icc_len, "copyright", la, co); + if (str) + { + fprintf (stderr, "copyright: %s\n", str); + free (str); + } + } + { + char *str = babl_icc_get_key (icc_data, icc_len, "device", la, co); + if (str) + { + fprintf (stderr, "device: %s\n", str); + free (str); + } + } + { + char *str = babl_icc_get_key (icc_data, icc_len, "manufacturer", la, co); + if (str) + { + fprintf (stderr, "manufacturer: %s\n", str); + free (str); + } + } babl = babl_space_from_icc (icc_data, icc_len, &error); free (icc_data); if (error || !babl) @@ -55,14 +87,6 @@ main (int argc, return -1; } - { - BablSpace *space = (void*)babl; - if (space->description) - fprintf (stderr, "description: %s\n", space->description); - if (space->copyright) - fprintf (stderr, "copyright: %s\n", space->copyright); - } - icc_data = (char *)babl_space_to_icc (babl, &genlen); if (icc_data) { -- 2.30.2